**************
** Figure 4 **
**************

global db "D:\Dropbox\unequal_gains\main_data"
global Section4 "D:\Dropbox\unequal_gains\QJE revision plan\analysis\section4_data"
global resultspath "D:\Dropbox\unequal_gains\QJE revision plan\analysis\clean_results"
global Appendixresultspath "D:\Dropbox\unequal_gains\QJE revision plan\analysis\clean_results\appendix"


* 1. Data preparation

* first get average consumer income by product module -- price decile (from HMS)
clear 
set obs 1
gen product_module_code=.
save "$Section4/consumerincome_QM", replace

foreach i of numlist 2004(1)2015 {

use "$db/Important Price Datasets/price_index_household_income_`i'_2004_modules.dta", clear

* bring in info on UPC price decile (within product module) in base period:
merge m:1 upc upc_ver_uc using "$db/Important Lists/upc_brand_size_final.dta"
keep if _merge==3
drop _merge
sum size1_amount, d
drop if size1_amount<r(p1) | size1_amount>r(p99)

* get income
drop if missing(household_income)
gen household_income_v2=2500 if household_income==3
replace household_income_v2=6500 if household_income==4
replace household_income_v2=9000   if household_income==6
replace household_income_v2=11000  if household_income==8
replace household_income_v2=13500  if household_income==10
replace household_income_v2=17500  if household_income==11
replace household_income_v2=22500  if household_income==13
replace household_income_v2=27500  if household_income==15
replace household_income_v2=32500  if household_income==16
replace household_income_v2=37500  if household_income==17
replace household_income_v2=42500  if household_income==18
replace household_income_v2=47500  if household_income==19
replace household_income_v2=55000  if household_income==21
replace household_income_v2=65000  if household_income==23
replace household_income_v2=85000  if household_income==26
replace household_income_v2=150000 if household_income==27
replace household_income_v2=150000 if household_income>27

* create average_unit_price_all for consistency with other file
bysort upc upc_ver_uc: egen double temp1=sum(projection_factor*total_quantity)
bysort upc upc_ver_uc: egen double temp2=sum(projection_factor*total_spending)
gen average_unit_price_all=temp2/temp1
drop temp1 temp2
gen average_unit_price_adj=average_unit_price_all/(size1_amount*multi)

* measure income at the UPC level so that we can build the quality ranks based on UPC-level data 
* (rather than UPC*number of income groups)
bysort upc upc_ver_uc: egen double totalspending_upc = sum(projection_factor*total_spending)
bysort upc upc_ver_uc: egen double incspending_upc= sum(projection_factor*total_spending*household_income_v2)
keep upc upc_ver_uc average_unit_price_adj product_module_code totalspending_upc incspending_upc
duplicates drop 

* create quality ranks without weights
sort product_module_code average_unit_price_adj
bysort product_module_code: gen double temp=[_N]
bysort product_module_code: gen double temp2=[_n]
gen rank=temp2/temp*100
drop temp temp2
gen quality_rank=.
foreach r of numlist 1(1)10 {
replace quality_rank=`r' if rank<=`r'*10 & missing(quality_rank)
}

* now bring to module by quality rank level
bysort product_module_code quality_rank: egen double inc_totalspen_QM = sum(incspending_upc)
bysort product_module_code quality_rank: egen double totalspen_QM = sum(totalspending_upc)

gen double consumer_income_salesw=inc_totalspen_QM/totalspen_QM
keep product_module_code quality_rank consumer_income_salesw inc_totalspen_QM totalspen_QM
duplicates drop
gen year=`i'
append using "$Section4/consumerincome_QM"
drop if missing(product_module_code)
save "$Section4/consumerincome_QM", replace

display "DONE WITH YEAR `i'"
}

* now bring to outcome data & finalize variables
use "$Section4/consumerincome_QM", clear
collapse (sum) inc_totalspen_QM totalspen_QM, by(product_module_code quality)
gen double consumer_income_salesw=inc_totalspen_QM/totalspen_QM
merge 1:1 product_module_code quality_rank using  "$Section4/outcomes.dta"
drop _merge

* for RMS, use all years except 2010
gen RMS_avg_spending = (RMS_total_spending_QM_2006 + RMS_total_spending_QM_2007 + RMS_total_spending_QM_2008 ///
 + RMS_total_spending_QM_2009 + RMS_total_spending_QM_2011 + RMS_total_spending_QM_2012 ///
 + RMS_total_spending_QM_2013 + RMS_total_spending_QM_2014 + RMS_total_spending_QM_2015)/9 

gen RMS_cum_spending_growth = (RMS_total_spending_QM_2014+RMS_total_spending_QM_2015)/(RMS_total_spending_QM_2006+RMS_total_spending_QM_2007) 
gen RMS_avg_spending_growth_pc = (RMS_cum_spending_growth^(1/(2015-2007)) - 1)*100
sum RMS_avg_spending_growth_pc [aw=RMS_avg_spending], d

gen RMS_avg_infl_full = (RMS_fullces_pi_win_common-1)*100
sum RMS_avg_infl_full [aw=RMS_avg_spending], d

gen RMS_avg_realspending_growth_pc = RMS_avg_spending_growth_pc - RMS_avg_infl_full
sum RMS_avg_realspending_growth_pc [aw=RMS_avg_spending], d
gen p5_realgrowth=r(p5)
gen p95_realgrowth=r(p95)

gen RMS_avg_tornqvist_infl = (RMS_tornqvist_price_index_win - 1)*100
gen RMS_avg_ces_infl = (RMS_ces_price_index_win - 1)*100

* keep track of variables in average log change (for IV)
gen RMS_cum_Dlog_spending = log( (RMS_total_spending_QM_2014+RMS_total_spending_QM_2015)/2 ) ///
- log( (RMS_total_spending_QM_2006+RMS_total_spending_QM_2007)/2 )
gen RMS_avg_Dlog_spending = RMS_cum_Dlog_spending/(2015-2007)
sum RMS_avg_Dlog_spending [aw=RMS_avg_spending], d

gen RMS_avg_Dlog_realspending = RMS_avg_Dlog_spending - RMS_lfullces_pi_win_common
sum RMS_avg_Dlog_realspending [aw=RMS_avg_spending], d
gen p5_Dlog_realspending=r(p5)
gen p95_Dlog_realspending=r(p95)


* 2. Main figures with RMS data

* Panel (a)
reg RMS_feenstra_win RMS_avg_realspending_growth_pc [aw=log(RMS_avg_spending)] ///
if inrange(RMS_avg_realspending_growth_pc,p5_realgrowth,p95_realgrowth), cluster(product_module_code)
matrix b=e(b) 
local b: di %6.5f `=b[1,1]'
matrix var=e(V)
matrix sd = sqrt(var[1,1])
local se: di %6.5f `=sd[1,1]'
display "`b'"
display "`se'"

binscatter RMS_feenstra_win RMS_avg_realspending_growth_pc [aw=log(RMS_avg_spending)] ///
if inrange(RMS_avg_realspending_growth_pc,p5_realgrowth,p95_realgrowth), reportreg nq(100) ///
xtitle("Annualized Real Spending Growth, 2006-2015 (%)") ylabel(,angle(0)) ///
ytitle ("Average Feenstra Ratio, 2006-2015") ///
graphregion(color(white)) note("Coeff. `b' ***" "s.e. `se'", span ring(0) pos(12) size(med) box bfc(white))
graph export "$resultspath/Figure4a.pdf", as(pdf) replace
graph save "$resultspath/Figure4a.gph", replace

* Panel (b)
reg RMS_avg_ces_infl RMS_avg_realspending_growth_pc [aw=log(RMS_avg_spending)] ///
if inrange(RMS_avg_realspending_growth_pc,p5_realgrowth,p95_realgrowth), cluster(product_module_code)
matrix b=e(b) 
local b: di %6.5f `=b[1,1]'
matrix var=e(V)
matrix sd = sqrt(var[1,1])
local se: di %6.5f `=sd[1,1]'
display "`b'"
display "`se'"

binscatter RMS_avg_ces_infl RMS_avg_realspending_growth_pc [aw=log(RMS_avg_spending)] ///
if inrange(RMS_avg_realspending_growth_pc,p5_realgrowth,p95_realgrowth), nq(100) reportreg ///
xtitle("Annualized Real Spending Growth, 2006-2015 (%)") ylabel(,angle(0)) ///
ytitle ("Average CES Inflation" "for Continued Products, 2006-2015 (%)") ///
graphregion(color(white)) note("Coeff. `b' ***" "s.e. `se'", span ring(0) pos(12) size(med) box bfc(white))
graph export "$resultspath/Figure4b.pdf", as(pdf) replace
graph save "$resultspath/Figure4b.gph", replace

* Panel (c)
reg RMS_avg_realspending_growth_pc consumer_income_salesw [aw=log(RMS_avg_spending)], cluster(product_module_code)
matrix b=e(b) 
local b: di %6.5f `=b[1,1]'
matrix var=e(V)
matrix sd = sqrt(var[1,1])
local se: di %7.6f `=sd[1,1]'
display "`b'"
display "`se'"

binscatter RMS_avg_realspending_growth_pc consumer_income_salesw [aw=log(RMS_avg_spending)] ///
, nq(100) reportreg ///
xtitle("Sales-Weighted Consumer Income in Product Module - Price Decile," "2004-2015 ($)") ylabel(,angle(0)) ///
ytitle ("Annualized Real Spending Growth, 2006-2015 (%)") ///
graphregion(color(white)) note("Coeff. `b' ***" "s.e. `se'", span ring(0) pos(12) size(med) box bfc(white))
graph export "$resultspath/Figure4c.pdf", as(pdf) replace
graph save "$resultspath/Figure4c.gph", replace

* Panel (d)  
sum RMS_feenstra_win [aw=RMS_avg_spending], d
gen p5_RMSfeenstra=r(p5)
gen p95_RMSfeestra=r(p95)

reg RMS_avg_ces_infl RMS_feenstra_win [aw=log(RMS_avg_spending)] ///
if inrange(RMS_feenstra_win,p5_RMSfeenstra,p95_RMSfeestra), cluster(product_module_code)
matrix b=e(b) 
local b: di %6.5f `=b[1,1]'
matrix var=e(V)
matrix sd = sqrt(var[1,1])
local se: di %6.5f `=sd[1,1]'
display "`b'"
display "`se'"

binscatter RMS_avg_ces_infl RMS_feenstra_win [aw=log(RMS_avg_spending)] ///
if inrange(RMS_feenstra_win,p5_RMSfeenstra,p95_RMSfeestra), nq(100) reportreg ///
xtitle("Average Feenstra Ratio, 2006-2015") ylabel(,angle(0)) ///
ytitle ("Average CES Inflation" "for Continued Products, 2006-2015 (%)") ///
graphregion(color(white)) note("Coeff. `b' ***" "s.e. `se'", span ring(0) pos(12) size(med) box bfc(white))
graph export "$resultspath/Figure4d.pdf", as(pdf) replace
graph save "$resultspath/Figure4d.gph", replace

* Panel (e)
reg RMS_feenstra_win quality_rank [aw=log(RMS_avg_spending)], cluster(product_module_code)
matrix b=e(b) 
local b: di %6.5f `=b[1,1]'
matrix var=e(V)
matrix sd = sqrt(var[1,1])
local se: di %6.5f `=sd[1,1]'
display "`b'"
display "`se'"

binscatter RMS_feenstra_win quality_rank [aw=log(RMS_avg_spending)], nq(100) reportreg ///
xtitle("Price Decile (within Product Module)") ylabel(,angle(0)) xlabel(1(1)10) ///
ytitle ("Average Feenstra Ratio, 2006-2015") ///
graphregion(color(white)) note("Coeff. `b' ***" "s.e. `se'", span ring(0) pos(12) size(med) box bfc(white))
graph export "$resultspath/Figure4e.pdf", as(pdf) replace
graph save "$resultspath/Figure4e.gph", replace
 
* Panel (f) 
reg RMS_avg_ces_infl quality_rank [aw=log(RMS_avg_spending)], cluster(product_module_code)
matrix b=e(b) 
local b: di %6.5f `=b[1,1]'
matrix var=e(V)
matrix sd = sqrt(var[1,1])
local se: di %6.5f `=sd[1,1]'
display "`b'"
display "`se'"

binscatter RMS_avg_ces_infl quality_rank [aw=log(RMS_avg_spending)], reportreg ///
xtitle("Price Decile (within Product Module)") ylabel(,angle(0)) xlabel(1(1)10) ///
ytitle ("Average CES Inflation" "for Continued Products, 2006-2015 (%)") ///
graphregion(color(white)) note("Coeff. `b' ***" "s.e. `se'", span ring(0) pos(12) size(med) box bfc(white))
graph export "$resultspath/Figure4f.pdf", as(pdf) replace
graph save "$resultspath/Figure4f.gph", replace 
 